Run t-SNE on CD4+ events which express one or more cytokines. Repeat for Non-CD4 events.
The question being asked is “What are the memory and activation profiles of Ag-specific T cells?”

Stratify by Cohort and Antigen (S1, S2, NCAP, VEMP)
Color by
- Degree of functionality
- CD45RA
- CCR7
- HLA-DR
- CD38
Boxplots?

Also:
- Given the elevated IL2 in DMSO Hosp, it might actually also be interesting to take a look at the DMSO signal.
- Take a look at the difference in non-bg-corrected proportions and bg-corrected-proportions in order to get an idea of what amount of the t-SNE data might be background noise.
- Note that the term Not-CD4 is a bit ambiguous. The “4+” gate contains “CD4+CD8-” events, and the “NOT4+” gate contains “CD4-CD8+/-” events.

library(openCyto)
library(CytoML) # 1.12.0
library(flowCore) # required for description()
library(flowWorkspace) # required for gh_pop_get_data()
library(here)
library(tidyverse)
library(Rtsne)
library(ggplot2)
library(scales)
library(patchwork)
library(hues)
library(RColorBrewer)
library(ggrepel)
source(here::here("scripts/20200604_Helper_Functions.R")) # for distributeEvents() and sampleGatingHierarchy()
date <- 20200617
save_output <- TRUE
rerun_tsne <- TRUE

gs <- load_gs(here::here("out/GatingSets/20200611_HAARVI_ICS_GatingSet_AllBatches"))
## loading R object...
## loading tree object...
## Done
dput(gh_get_pop_paths(gs))
## c("root", "/Time", "/Time/LD-3+", "/Time/LD-3+/1419-3+", "/Time/LD-3+/1419-3+/S", 
## "/Time/LD-3+/1419-3+/S/Lymph", "/Time/LD-3+/1419-3+/S/Lymph/4+", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/4+/154", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CCR7+", "/Time/LD-3+/1419-3+/S/Lymph/4+/CD45RA+", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL2", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL4513", 
## "/Time/LD-3+/1419-3+/S/Lymph/4+/TNF", "/Time/LD-3+/1419-3+/S/Lymph/CD38+", 
## "/Time/LD-3+/1419-3+/S/Lymph/HLADR+", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+", 
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/154", 
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/CCR7+", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/CD45RA+", 
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL2", 
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL4513", 
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/TNF")

CD4

Perform CD4 analysis all the way through first.

Sample events for t-SNE and QC

cd4_gates_for_tsne <- c(
  "/Time/LD-3+/1419-3+/S/Lymph/4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/4+/154", 
  "/Time/LD-3+/1419-3+/S/Lymph/4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL2", 
  "/Time/LD-3+/1419-3+/S/Lymph/4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL4513", 
  "/Time/LD-3+/1419-3+/S/Lymph/4+/TNF",
  "/Time/LD-3+/1419-3+/S/Lymph/4+/CCR7+", "/Time/LD-3+/1419-3+/S/Lymph/4+/CD45RA+",
  "/Time/LD-3+/1419-3+/S/Lymph/CD38+", "/Time/LD-3+/1419-3+/S/Lymph/HLADR+")
cd4_cytokine_gates <- c("/Time/LD-3+/1419-3+/S/Lymph/4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/4+/154", 
                        "/Time/LD-3+/1419-3+/S/Lymph/4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL2", 
                        "/Time/LD-3+/1419-3+/S/Lymph/4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL4513", 
                        "/Time/LD-3+/1419-3+/S/Lymph/4+/TNF")

Still need to add boolean gates.

gs_pop_add(gs, eval(substitute(flowWorkspace::booleanFilter(v),
                                       list(v = as.symbol(paste(cd4_cytokine_gates, collapse="|"))))),
           parent = "/Time/LD-3+/1419-3+/S/Lymph/4+", name = "CD4_CytokinePos")
## [1] 29
recompute(gs, "/Time/LD-3+/1419-3+/S/Lymph/4+")
## ..................................................................................................................................................................................................................................................................................................................................................................................................................done!
cd4_cytokine_pos_parentGate <- "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_CytokinePos"

pop_counts <- pData(gs) %>% 
  left_join(gs_pop_get_count_fast(gs, subpopulations = cd4_cytokine_pos_parentGate),
            by = c("rowname" = "name")) %>% 
  dplyr::rename(CD4_CytokinePos = Count) %>% 
  dplyr::select(rowname, Batch, "SAMPLE ID", STIM, Cohort, CD4_CytokinePos)

37C, 116C (DMSO), and 07B Spike 2 were filtered out for COMPASS due to low cell counts.
37C also has ~2-5% events in Live CD3+ gate. 07B Spike 2 has low-ish 13% Live CD3+ gate membership.
Include them all here, but note that for some of them, the low event counts means they will be under-represented in t-SNE.

head(pop_counts)
##             rowname Batch SAMPLE ID    STIM       Cohort CD4_CytokinePos
## 1 112405.fcs_332150     1     15545    DMSO Hospitalized             694
## 2 112407.fcs_341600     1     15545     SEB Hospitalized           38139
## 3 112409.fcs_398100     1     15545    VEMP Hospitalized             954
## 4 112411.fcs_348925     1     15545 Spike 1 Hospitalized            1022
## 5 112413.fcs_296775     1     15545 Spike 2 Hospitalized            1004
## 6 112415.fcs_378400     1     15545    NCAP Hospitalized            1359
sub_group_size_calculations <- pop_counts %>% 
  dplyr::filter(Cohort != "Healthy control" & !is.na(Cohort) & !(STIM %in% c("DMSO", "SEB"))) %>% # TRIMAS are NA
  group_by(Batch, Cohort, STIM) %>% 
  dplyr::summarise(n_Patients = n(),
                   min_CD4_CytokinePos = min(CD4_CytokinePos),
                   total_CD4_CytokinePos = sum(CD4_CytokinePos),
                   n_x_min_CD4_CytokinePos = n_Patients * min_CD4_CytokinePos)
sub_group_size_calculations %>% 
  arrange(n_x_min_CD4_CytokinePos) %>% 
  head() %>% 
  knitr::kable()
Batch Cohort STIM n_Patients min_CD4_CytokinePos total_CD4_CytokinePos n_x_min_CD4_CytokinePos
2 Non-hospitalized Spike 1 13 13 4317 169
2 Non-hospitalized Spike 2 13 15 1763 195
2 Non-hospitalized NCAP 13 30 3860 390
2 Non-hospitalized VEMP 13 36 11120 468
2 Hospitalized Spike 2 7 120 1495 840
1 Hospitalized Spike 2 6 171 2968 1026

Above: Based on the n_x_min_CD4_CytokinePos column, if we maintained equal representation of the patients within each group (by batch, cohort, and stim) there would only be 169 events per group, leading to a total of only 169x3x4 = 2028 events.
The limiting group is: B2, Non-hospitalized, Spike 1

sub_group_size_calculations %>% 
  arrange(total_CD4_CytokinePos) %>% 
  head() %>% 
  knitr::kable()
Batch Cohort STIM n_Patients min_CD4_CytokinePos total_CD4_CytokinePos n_x_min_CD4_CytokinePos
2 Hospitalized Spike 2 7 120 1495 840
2 Non-hospitalized Spike 2 13 15 1763 195
1 Hospitalized Spike 2 6 171 2968 1026
2 Hospitalized NCAP 7 312 3431 2184
3 Hospitalized Spike 2 7 239 3546 1673
1 Non-hospitalized Spike 2 13 130 3656 1690

Above: If we allow unequal patient representation, we can have up to 1495 events per group.
Let’s settle for 1000x*3x4 = 12000 events total in the t-SNE run. So, 1000 events per group.

cd4_cytokinepos_sampleSizes <- pop_counts %>%
  dplyr::filter(Cohort != "Healthy control" & !is.na(Cohort) & !(STIM %in% c("DMSO", "SEB"))) %>% 
  group_by(Batch, Cohort, STIM) %>% 
  nest() %>% 
  ungroup() %>% 
  mutate(nsamp = map2(1000, data, function(totalEvents, df) {
    distributeEvents(totalEvents, df$CD4_CytokinePos)
  })) %>% 
  unnest(cols = c(data, nsamp))

Make sure all groups are the same size

cd4_cytokinepos_sampleSizes %>% 
  group_by(Batch, Cohort, STIM) %>% 
  summarise(nsamp_sum = sum(nsamp)) %>% 
  dplyr::pull(nsamp_sum) %>% 
  unique()
## [1] 1000

But keep in mind that there is lopsided patient representation even within groups:

cd4_cytokinepos_sampleSizes %>% 
  arrange(nsamp) %>% 
  head(n=10)
## # A tibble: 10 x 7
##    Batch STIM    Cohort         rowname        `SAMPLE ID` CD4_CytokinePos nsamp
##    <dbl> <chr>   <chr>          <chr>          <chr>                 <int> <dbl>
##  1     2 Spike 1 Non-hospitali… 113851.fcs_27… 37C                      13    13
##  2     2 Spike 2 Non-hospitali… 113853.fcs_23… 37C                      15    15
##  3     2 NCAP    Non-hospitali… 113855.fcs_20… 37C                      30    30
##  4     2 VEMP    Non-hospitali… 113849.fcs_25… 37C                      36    36
##  5     2 Spike 2 Non-hospitali… 113685.fcs_13… 80C                      55    55
##  6     2 Spike 2 Non-hospitali… 113781.fcs_12… 76C                      61    61
##  7     1 VEMP    Non-hospitali… 112629.fcs_32… 162C                    893    76
##  8     1 Spike 1 Non-hospitali… 112643.fcs_23… 169C                    743    76
##  9     1 Spike 2 Non-hospitali… 112488.fcs_32… 141C                    311    76
## 10     1 NCAP    Non-hospitali… 112859.fcs_36… 51C                     708    76
cd4_cytokinepos_sampleSizes_4plot <- cd4_cytokinepos_sampleSizes %>%
  mutate(the_label = ifelse(nsamp < 50,
                            paste0("Batch ", Batch, ", ", `SAMPLE ID`, ", ", STIM), NA)) %>% 
  mutate(Cohort = factor(Cohort,
                         levels = c("Non-hospitalized", "Hospitalized"),
                         labels = c("Conv\nNon-Hosp", "Conv\nHosp")))
ggplot(cd4_cytokinepos_sampleSizes_4plot,
       aes(factor(Cohort), nsamp, label = the_label)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.15, height = 0) +
  theme_bw(base_size=20) +
  labs(title="ICS CD4 t-SNE patient representation",
       y="Events Sampled for t-SNE") +
  geom_text() +
  facet_grid(Batch ~ STIM) +
  theme(axis.title.x = element_blank())
## Warning: Removed 232 rows containing missing values (geom_text).

Reproducibility is important

set.seed(date)

call_sampleGatingHierarchy_for_cd4 <- function(currentSampleName, currentSampleSize) {
  # print(sprintf("Sampling data from %s", currentSampleName))
  sampleGatingHierarchy(gs[[currentSampleName]], cd4_cytokine_pos_parentGate, currentSampleSize, cd4_gates_for_tsne)
}
cd4_cytokinepos_data4tsne <- map2_dfr(cd4_cytokinepos_sampleSizes$rowname, cd4_cytokinepos_sampleSizes$nsamp,
                                     call_sampleGatingHierarchy_for_cd4)
knitr::kable(head(cd4_cytokinepos_data4tsne))
name EXPERIMENT NAME $DATE SAMPLE ID PATIENT ID STIM WELL ID PLATE NAME filename rowname Sample ID Cohort Age Sex Race Hispanic? Days symptom onset to visit 1 Pair ID Batch /Time/LD-3+/1419-3+/S/Lymph/4+/CD4_CytokinePos /Time/LD-3+/1419-3+/S/Lymph/4+/107a /Time/LD-3+/1419-3+/S/Lymph/4+/154 /Time/LD-3+/1419-3+/S/Lymph/4+/IFNG /Time/LD-3+/1419-3+/S/Lymph/4+/IL2 /Time/LD-3+/1419-3+/S/Lymph/4+/IL17 /Time/LD-3+/1419-3+/S/Lymph/4+/IL4513 /Time/LD-3+/1419-3+/S/Lymph/4+/TNF /Time/LD-3+/1419-3+/S/Lymph/4+/CCR7+ /Time/LD-3+/1419-3+/S/Lymph/4+/CD45RA+ /Time/LD-3+/1419-3+/S/Lymph/CD38+ /Time/LD-3+/1419-3+/S/Lymph/HLADR+ Time FSC-A FSC-H SSC-A SSC-H CD8b BB700 TNFa FITC CD107a PE-Cy7 CD154 PE-Cy5 CD3 ECD IL2 PE CD4 APC-H7 IL17a Ax700 IL4/5/13 APC CD14/CD19 BV785 CCR7 BV711 CD38 BV605 L/D IFNg V450 CD45RA BUV737 HLADR BUV395
112409.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 VEMP A06 P2 15545_A6_A06_006.fcs 112409.fcs_398100 NA Hospitalized 63 N/A N/A N/A 47 7 1 1 0 0 0 1 0 0 0 1 1 1 0 23.94000 78295.2 63400 13757.31 13208 780.8013 182.53427 648.0172 1346.5171 2728.276 2440.7312 1567.578 474.6469 650.5800 928.9816 1786.755 2063.9739 844.7427 680.6394 2495.806 259.3407
112409.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 VEMP A06 P2 15545_A6_A06_006.fcs 112409.fcs_398100 NA Hospitalized 63 N/A N/A N/A 47 7 1 1 0 0 0 0 1 0 0 1 1 1 0 65.03799 101052.6 86636 18849.42 18764 704.6745 344.05429 602.1383 1278.8933 3131.226 623.1863 1887.669 2623.1912 452.9386 1564.1038 3225.979 2081.0901 1191.9154 456.8972 2579.319 651.7107
112409.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 VEMP A06 P2 15545_A6_A06_006.fcs 112409.fcs_398100 NA Hospitalized 63 N/A N/A N/A 47 7 1 1 1 0 0 0 0 0 0 1 0 0 0 21.97200 146510.5 123037 26374.05 25209 572.1346 -13.31206 1489.6616 866.1031 2911.237 577.0376 1525.547 952.7986 694.1076 873.2791 2209.903 887.0472 1413.6069 705.7365 1063.861 854.5002
112409.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 VEMP A06 P2 15545_A6_A06_006.fcs 112409.fcs_398100 NA Hospitalized 63 N/A N/A N/A 47 7 1 1 1 0 0 0 0 0 0 1 0 0 0 40.56900 119913.6 100572 20781.69 19036 592.0034 661.64246 2065.9065 900.2537 2759.695 778.2350 1830.089 1198.0159 535.0631 882.6482 2149.851 1145.0156 1672.9725 859.9519 1517.605 735.4972
112409.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 VEMP A06 P2 15545_A6_A06_006.fcs 112409.fcs_398100 NA Hospitalized 63 N/A N/A N/A 47 7 1 1 1 0 0 0 0 0 0 1 1 0 0 60.07300 104987.9 91785 18149.07 17661 632.3518 574.70831 2038.7100 857.8076 2891.542 690.3101 1556.152 1083.3341 609.9719 429.5510 2406.151 1614.5096 1753.3729 678.1209 2451.481 500.9050
112409.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 VEMP A06 P2 15545_A6_A06_006.fcs 112409.fcs_398100 NA Hospitalized 63 N/A N/A N/A 47 7 1 1 1 0 0 0 0 0 0 1 0 0 0 49.07500 104532.7 89304 18408.33 17692 544.7924 564.35364 1777.3187 902.0987 2861.205 782.8384 1633.887 1280.8867 420.9029 1127.9441 2312.682 1331.2260 1601.4375 847.6118 1514.634 571.2283
dim(cd4_cytokinepos_data4tsne)
## [1] 24000    52

Is there maybe one cytokine that is dominating the entire sample?
If one cytokine has very high background expression (and a generous gate), it could be gated positive in a lot of events.
The high number of events expressing this cytokine could lead to it dominating the data, so that most sampled events are positive for this noisy cytokine. It would drown out real signal from other cytokines.

cytokine_dominance <- cd4_cytokinepos_data4tsne %>% 
  group_by(Batch) %>% 
  summarise_at(cd4_cytokine_gates, sum) %>%
  t() %>%
  as.data.frame() %>%
  set_names(c("B1", "B2", "B3")) %>%
  rownames_to_column("Cytokine_Gate") %>%
  dplyr::filter(Cytokine_Gate != "Batch")
knitr::kable(cytokine_dominance)
Cytokine_Gate B1 B2 B3
/Time/LD-3+/1419-3+/S/Lymph/4+/107a 1579 2177 1681
/Time/LD-3+/1419-3+/S/Lymph/4+/154 2249 3587 2298
/Time/LD-3+/1419-3+/S/Lymph/4+/IFNG 1330 1398 931
/Time/LD-3+/1419-3+/S/Lymph/4+/IL2 2070 2419 1992
/Time/LD-3+/1419-3+/S/Lymph/4+/IL17 2356 265 1805
/Time/LD-3+/1419-3+/S/Lymph/4+/IL4513 815 1649 1429
/Time/LD-3+/1419-3+/S/Lymph/4+/TNF 1988 2942 2315
cytokine_dominance %>%
  pivot_longer(cols = starts_with("B"), names_to = "Batch", values_to = "Events_in_Gate") %>% 
  mutate(Cytokine = sub(".*4\\+\\/(.*)", "\\1", Cytokine_Gate)) %>% 
  ggplot(aes(Cytokine, Events_in_Gate, fill = Cytokine)) +
  theme_bw(base_size=18) +
  geom_bar(stat="identity") +
  facet_grid(. ~ Batch) +
  labs(title = "CD4 t-SNE Run Cytokine Dominance by Batch")

The cytokine imbalance doesn’t look too bad.

Run t-SNE

# t-SNE can take a long time, so there is a rerun_tsne switch
if(rerun_tsne) {
  print("Running t-SNE")
  print(Sys.time())
  cd4_cytokinepos_tsne_out <- cd4_cytokinepos_data4tsne %>% 
    # Run CD3, co-receptor, cytokine, memory, and activation markers through t-SNE
    dplyr::select("CD3 ECD", "CD8b BB700", "CD4 APC-H7",
                  "TNFa FITC", "CD107a PE-Cy7", 
                  "CD154 PE-Cy5", "IL2 PE", "IL17a Ax700", 
                  "IL4/5/13 APC", "IFNg V450", 
                  "CCR7 BV711", "CD45RA BUV737",
                  "CD38 BV605", "HLADR BUV395") %>% 
    Rtsne::Rtsne(check_duplicates = FALSE)
  print(Sys.time())
  cd4_cytokinepos_dat <- cbind(as.data.frame(cd4_cytokinepos_tsne_out$Y) %>% 
    dplyr::rename(x = V1, y = V2),
    cd4_cytokinepos_data4tsne)
  if(save_output) {
    print("Loading saved t-SNE run")
    saveRDS(cd4_cytokinepos_dat, here::here(sprintf("out/tSNE/%s_ICS_CD4_CytokinePos_tSNE.rds", date)))
  }
} else {
  # Assuming t-SNE results are already saved
  cd4_cytokinepos_dat <- readRDS(here::here(sprintf("out/tSNE/%s_ICS_CD4_CytokinePos_tSNE.rds", date)))
}
## [1] "Running t-SNE"
## [1] "2020-06-19 15:53:30 PDT"
## [1] "2020-06-19 15:55:14 PDT"
## [1] "Loading saved t-SNE run"

Plot t-SNE results

# Arial font setup. Downloaded afms from https://github.com/microsoft/microsoft-r-open/tree/ec3fd89e5fb5794bd8149905c134ad801bb61800
Arial <- Type1Font(family = "Arial",
                   metrics = c(here::here("data/Arial_afm/ArialMT.afm"),
                               here::here("data/Arial_afm/ArialMT-Bold.afm"), 
                               here::here("data/Arial_afm/ArialMT-Italic.afm"),
                               here::here("data/Arial_afm/ArialMT-BoldItalic.afm")))
pdfFonts(Arial = Arial)

boolColorScheme <- c("FALSE" = "#E2E2E2", "TRUE" = "#023FA5")

cd4_cytokinepos_dat <- cd4_cytokinepos_dat %>% 
  mutate(cytokine_degree = rowSums(dplyr::select(., all_of(cd4_cytokine_gates))))
cd4_cytokinepos_dat <- cd4_cytokinepos_dat %>% 
  mutate(Cohort = factor(Cohort, levels = c("Non-hospitalized", "Hospitalized")),
         STIM = factor(STIM, levels = c("Spike 1", "Spike 2", "NCAP", "VEMP")))

stim_labs <- c("S1", "S2", "NCAP", "VEMP") 
names(stim_labs) <- c("Spike 1", "Spike 2", "NCAP", "VEMP")
cohort_labs <- c("Conv\nNon-Hosp", "Conv\nHosp")
names(cohort_labs) <- c("Non-hospitalized", "Hospitalized")

base_tsne_plot <- function(currentColumn, pointSize = 0.2, colorScheme = NA) {
  p <- ggplot(cd4_cytokinepos_dat, aes(x=x, y=y,
                                       colour=if(currentColumn %in% c("Batch", "SAMPLE ID")) {
                                         factor(!!as.name(currentColumn))
                                         } else {
                                           as.logical(!!as.name(currentColumn))
                                           })) +
    geom_point(shape=20, alpha=0.8, size=pointSize) +
    facet_grid(Cohort ~ STIM, switch="y",
               labeller = labeller(Cohort = cohort_labs, STIM = stim_labs)) +
    theme_bw() +
    theme(plot.title=element_text(hjust = 0.5, size=22, face="bold"),
          axis.text=element_blank(),
          axis.line=element_blank(),
          axis.ticks=element_blank(),
          axis.title=element_blank(),
          strip.text=element_text(face="bold", size=22),
          panel.grid.major = element_blank(),
          legend.title=element_text(face="bold", size=14),
          strip.text.x = element_text(margin = margin(0.15,0,0.15,0, "cm")))
  if(!anyNA(colorScheme)) {
    p <- p + scale_color_manual(values = colorScheme)
  }
  if(currentColumn %in% c("Batch", "SAMPLE ID")) {
    p <- p + labs(color = currentColumn) +
      guides(colour = guide_legend(override.aes = list(size=10))) +
      theme(legend.title = element_text(size=15),
            legend.text = element_text(size=15),
            legend.position = "bottom") +
      scale_colour_manual(values=as.character(iwanthue(length(unique(cd4_cytokinepos_dat[,currentColumn])))))
  } else {
    p <- p + theme(legend.position = "none")
  }
  p
}

Look for Batch and Patient sub-clustering

base_tsne_plot("Batch")

base_tsne_plot("SAMPLE ID")

Cytokine, Memory, and Activation expression localization

Now visualize the cytokine, memory, and activation expression localization

for(cg in cd4_gates_for_tsne) {
  print(base_tsne_plot(cg, colorScheme = boolColorScheme) +
    labs(title = sprintf("CD4+Cytokine+ t-SNE\nColored by %s", sub(".*\\/([^(\\/)]+)", "\\1", cg))))
}

Color by degree

table(cd4_cytokinepos_dat$cytokine_degree)
## 
##     1     2     3     4     5 
## 16152  2594  3183  1969   102
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc <- scale_colour_gradientn(colours = myPalette(100), limits=c(1, 6))

currentColumn <- "cytokine_degree"
pointSize <- 0.2
ggplot(cd4_cytokinepos_dat, aes(x=x, y=y, colour=!!as.name(currentColumn))) +
  geom_point(shape=20, alpha=0.8, size=pointSize) +
  facet_grid(Cohort ~ STIM, switch="y",
             labeller = labeller(Cohort = cohort_labs, STIM = stim_labs)) +
  labs(colour="Cytokine\nDegree",
       title="CD4+Cytokine+ t-SNE\nColored by Cytokine Polyfunctionality") +
  theme_bw() +
  theme(plot.title=element_text(hjust = 0.5, size=22, face="bold"),
        axis.text=element_blank(),
        axis.line=element_blank(),
        axis.ticks=element_blank(),
        axis.title=element_blank(),
        strip.text=element_text(face="bold", size=22),
        panel.grid.major = element_blank(),
        legend.title=element_text(face="bold", size=15),
        legend.text = element_text(size=13),
        strip.text.x = element_text(margin = margin(0.15,0,0.15,0, "cm")),
        legend.position = "bottom") +
  sc

CD4 Observations

  • In terms of cytokine subpopulations which are more prevalent in a particular stim or hospitalization status, it is easier to identify them using COMPASS. But we can interpret the t-SNE results anyway, and we can see that the t-SNE results reflect our COMPASS findings:
    • VEMP looks different from all the rest of the stims, with a large CD107a subpopulation.
    • TNF, IL2, and CD154 are often co-expressed in the S1, S2, and NCAP panels, sometimes with IFNg. You can see that these events are enriched in the convalescent hospitalized patients (the dotplots from the COMPASS follow-up plots make this point more directly).
    • It is helpful to see that IL4/5/13 and IL17 each seem to form their own clusters (away from the TNF/IL2/CD154 clusters) and are seen across all the stims: however, a lot of these events are likely not above background (i.e. also present in the DMSO condition and potentially noise), as evident from the background-corrected dotplots and FACs plots. Memory:
    • Almost all cytokine positive events are CCR7+ (so, TCM or Naive)
    • Most of the TNF, IL2, and CD154 events look CD45RA- (so, likely TCM). In contrast, the CD107a, IL4/5/13, and IL17 clusters seem to be a mix of CD45RA+ (so, probably Naive) and CD45RA-.
  • Activation:
    • There are many tiny localizations of CD38+ expression, but not a lot of signal overall. We should probably visualize these events in a FACs plot to see if it looks real.
    • HLADR: basically nothing.
  • Polyfunctionality: Most of the polyfunctional signal is in the TNF, IL2, and CD154 area. Makes sense. Also a little bit of dual IL17a+IL4/5/13+ as well.
  • Despite the trends being more quantitative in COMPASS, I think t-SNE allows us to see some of the big-picture results very clearly.

Follow-up:
We should plot the memory breakdown of the cytokine+ events in boxplots.
We could also focus in on either the 1) IL2+ or TNF+ or CD154+ events as a group or 2) the polyfunctional COMPASS subsets, and make boxplots about their memory status.
Depending on how informative we think the activation data are, we can do the same for CD38 and HLADR.

Not-CD4

Perform Not-CD4 analysis.

Sample events for t-SNE and QC

notcd4_gates_for_tsne <- c(
  "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/154", 
  "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL2", 
  "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL4513", 
  "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/TNF",
  "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/CCR7+", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/CD45RA+",
  "/Time/LD-3+/1419-3+/S/Lymph/CD38+", "/Time/LD-3+/1419-3+/S/Lymph/HLADR+")
notcd4_cytokine_gates <- c("/Time/LD-3+/1419-3+/S/Lymph/NOT4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/154", 
                        "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL2", 
                        "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL4513", 
                        "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/TNF")

Still need to add boolean gates.

gs_pop_add(gs, eval(substitute(flowWorkspace::booleanFilter(v),
                                       list(v = as.symbol(paste(notcd4_cytokine_gates, collapse="|"))))),
           parent = "/Time/LD-3+/1419-3+/S/Lymph/NOT4+", name = "NotCD4_CytokinePos")
## [1] 30
recompute(gs) # , "/Time/LD-3+/1419-3+/S/Lymph/NOT4+") # For some reason there is an error if I try to recompute lower in the gating tree
## ..................................................................................................................................................................................................................................................................................................................................................................................................................done!
notcd4_cytokine_pos_parentGate <- "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/NotCD4_CytokinePos"

pop_counts <- pData(gs) %>% 
  left_join(gs_pop_get_count_fast(gs, subpopulations = notcd4_cytokine_pos_parentGate),
            by = c("rowname" = "name")) %>% 
  dplyr::rename(NotCD4_CytokinePos = Count) %>% 
  dplyr::select(rowname, Batch, "SAMPLE ID", STIM, Cohort, NotCD4_CytokinePos)

37C, 116C (DMSO), and 07B Spike 2 were filtered out for COMPASS due to low cell counts.
37C also has ~2-5% events in Live CD3+ gate. 07B Spike 2 has low-ish 13% Live CD3+ gate membership.
Include them all here, but note that for some of them, the low event counts means they will be under-represented in t-SNE.

head(pop_counts)
##             rowname Batch SAMPLE ID    STIM       Cohort NotCD4_CytokinePos
## 1 112405.fcs_332150     1     15545    DMSO Hospitalized                168
## 2 112407.fcs_341600     1     15545     SEB Hospitalized               7813
## 3 112409.fcs_398100     1     15545    VEMP Hospitalized                218
## 4 112411.fcs_348925     1     15545 Spike 1 Hospitalized                174
## 5 112413.fcs_296775     1     15545 Spike 2 Hospitalized                110
## 6 112415.fcs_378400     1     15545    NCAP Hospitalized                270
sub_group_size_calculations <- pop_counts %>% 
  dplyr::filter(Cohort != "Healthy control" & !is.na(Cohort) & !(STIM %in% c("DMSO", "SEB"))) %>% # TRIMAS are NA
  group_by(Batch, Cohort, STIM) %>% 
  dplyr::summarise(n_Patients = n(),
                   min_NotCD4_CytokinePos = min(NotCD4_CytokinePos),
                   total_NotCD4_CytokinePos = sum(NotCD4_CytokinePos),
                   n_x_min_NotCD4_CytokinePos = n_Patients * min_NotCD4_CytokinePos)
sub_group_size_calculations %>% 
  arrange(n_x_min_NotCD4_CytokinePos) %>% 
  head() %>% 
  knitr::kable()
Batch Cohort STIM n_Patients min_NotCD4_CytokinePos total_NotCD4_CytokinePos n_x_min_NotCD4_CytokinePos
2 Non-hospitalized Spike 2 13 29 1129 377
2 Hospitalized Spike 2 7 68 1649 476
3 Hospitalized Spike 2 7 76 1581 532
1 Hospitalized Spike 2 6 93 694 558
1 Non-hospitalized Spike 2 13 43 2558 559
2 Non-hospitalized NCAP 13 43 2011 559

Above: Based on the n_x_min_NotCD4_CytokinePos column, if we maintained equal representation of the patients within each group (by batch, cohort, and stim) there would be 273 events per group, leading to a total of only 377x3x4 = 4524 events.
The limiting group is: B2, Non-hospitalized, Spike 2

sub_group_size_calculations %>% 
  arrange(total_NotCD4_CytokinePos) %>% 
  head() %>% 
  knitr::kable()
Batch Cohort STIM n_Patients min_NotCD4_CytokinePos total_NotCD4_CytokinePos n_x_min_NotCD4_CytokinePos
1 Hospitalized Spike 2 6 93 694 558
1 Hospitalized Spike 1 6 166 1115 996
2 Non-hospitalized Spike 2 13 29 1129 377
1 Hospitalized NCAP 6 169 1373 1014
3 Hospitalized Spike 2 7 76 1581 532
2 Hospitalized Spike 2 7 68 1649 476

Above: If we allow unequal patient representation, we can have up to 694 events per group.
Let’s settle for 694x3x4 = 8328 events total in the t-SNE run.

notcd4_cytokinepos_sampleSizes <- pop_counts %>%
  dplyr::filter(Cohort != "Healthy control" & !is.na(Cohort) & !(STIM %in% c("DMSO", "SEB"))) %>% 
  group_by(Batch, Cohort, STIM) %>% 
  nest() %>% 
  ungroup() %>% 
  mutate(nsamp = map2(694, data, function(totalEvents, df) {
    distributeEvents(totalEvents, df$NotCD4_CytokinePos)
  })) %>% 
  unnest(cols = c(data, nsamp))

Make sure all groups are the same size

notcd4_cytokinepos_sampleSizes %>% 
  group_by(Batch, Cohort, STIM) %>% 
  summarise(nsamp_sum = sum(nsamp)) %>% 
  dplyr::pull(nsamp_sum) %>% 
  unique()
## [1] 694

But keep in mind that there is lopsided patient representation even within groups:

notcd4_cytokinepos_sampleSizes %>% 
  arrange(nsamp) %>% 
  head(n=10)
## # A tibble: 10 x 7
##    Batch STIM    Cohort        rowname       `SAMPLE ID` NotCD4_CytokineP… nsamp
##    <dbl> <chr>   <chr>         <chr>         <chr>                   <int> <dbl>
##  1     2 Spike 2 Non-hospital… 113721.fcs_2… 161C                       29    29
##  2     2 Spike 2 Non-hospital… 113781.fcs_1… 76C                        37    37
##  3     2 Spike 2 Non-hospital… 113853.fcs_2… 37C                        41    41
##  4     1 Spike 2 Non-hospital… 112821.fcs_1… 113C                       43    43
##  5     2 NCAP    Non-hospital… 113723.fcs_2… 161C                       43    43
##  6     2 Spike 1 Non-hospital… 113671.fcs_1… 59C                        46    46
##  7     2 Spike 2 Non-hospital… 113709.fcs_1… 157C                       46    46
##  8     1 NCAP    Non-hospital… 112823.fcs_1… 113C                       51    51
##  9     1 VEMP    Non-hospital… 112459.fcs_3… 112C                      732    53
## 10     1 VEMP    Non-hospital… 112484.fcs_3… 141C                      223    53
notcd4_cytokinepos_sampleSizes_4plot <- notcd4_cytokinepos_sampleSizes %>%
  mutate(the_label = ifelse(nsamp < 25,
                            paste0("Batch ", Batch, ", ", `SAMPLE ID`, ", ", STIM), NA)) %>% 
  mutate(Cohort = factor(Cohort,
                         levels = c("Non-hospitalized", "Hospitalized"),
                         labels = c("Conv\nNon-Hosp", "Conv\nHosp")))
ggplot(notcd4_cytokinepos_sampleSizes_4plot,
       aes(factor(Cohort), nsamp, label = the_label)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.15, height = 0) +
  theme_bw(base_size=20) +
  labs(title="ICS Not-CD4 t-SNE patient representation",
       y="Events Sampled for t-SNE") +
  geom_text_repel(size=4) +
  facet_grid(Batch ~ STIM) +
  theme(axis.title.x = element_blank())
## Warning: Removed 236 rows containing missing values (geom_text_repel).

Reproducibility is important

set.seed(date+1)

call_sampleGatingHierarchy_for_cd4 <- function(currentSampleName, currentSampleSize) {
  # print(sprintf("Sampling data from %s", currentSampleName))
  sampleGatingHierarchy(gs[[currentSampleName]], notcd4_cytokine_pos_parentGate, currentSampleSize, notcd4_gates_for_tsne)
}
notcd4_cytokinepos_data4tsne <- map2_dfr(notcd4_cytokinepos_sampleSizes$rowname, notcd4_cytokinepos_sampleSizes$nsamp,
                                     call_sampleGatingHierarchy_for_cd4)
knitr::kable(head(notcd4_cytokinepos_data4tsne))
name EXPERIMENT NAME $DATE SAMPLE ID PATIENT ID STIM WELL ID PLATE NAME filename rowname Sample ID Cohort Age Sex Race Hispanic? Days symptom onset to visit 1 Pair ID Batch /Time/LD-3+/1419-3+/S/Lymph/NOT4+/NotCD4_CytokinePos /Time/LD-3+/1419-3+/S/Lymph/NOT4+/107a /Time/LD-3+/1419-3+/S/Lymph/NOT4+/154 /Time/LD-3+/1419-3+/S/Lymph/NOT4+/IFNG /Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL2 /Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL17 /Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL4513 /Time/LD-3+/1419-3+/S/Lymph/NOT4+/TNF /Time/LD-3+/1419-3+/S/Lymph/NOT4+/CCR7+ /Time/LD-3+/1419-3+/S/Lymph/NOT4+/CD45RA+ /Time/LD-3+/1419-3+/S/Lymph/CD38+ /Time/LD-3+/1419-3+/S/Lymph/HLADR+ Time FSC-A FSC-H SSC-A SSC-H CD8b BB700 TNFa FITC CD107a PE-Cy7 CD154 PE-Cy5 CD3 ECD IL2 PE CD4 APC-H7 IL17a Ax700 IL4/5/13 APC CD14/CD19 BV785 CCR7 BV711 CD38 BV605 L/D IFNg V450 CD45RA BUV737 HLADR BUV395
112409.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 VEMP A06 P2 15545_A6_A06_006.fcs 112409.fcs_398100 NA Hospitalized 63 N/A N/A N/A 47 7 1 1 1 0 0 0 0 0 0 1 1 0 0 61.897 96120.24 84269 19137.39 18121 2197.9023 435.94394 1625.3967 1154.0245 3006.588 522.9092 1063.75098 596.0203 352.0423 1104.1075 2426.293 687.5623 1693.623 582.8617 2949.740 172.7109
112409.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 VEMP A06 P2 15545_A6_A06_006.fcs 112409.fcs_398100 NA Hospitalized 63 N/A N/A N/A 47 7 1 1 1 0 0 0 0 0 0 0 0 1 0 29.699 85531.16 71826 20106.57 19686 691.9189 42.50722 1985.0042 710.8685 2844.340 640.4456 1038.77844 1241.4800 617.5194 1017.9365 1424.282 2125.5798 1643.731 435.3643 1390.699 1594.4603
112409.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 VEMP A06 P2 15545_A6_A06_006.fcs 112409.fcs_398100 NA Hospitalized 63 N/A N/A N/A 47 7 1 1 1 0 0 0 0 0 0 1 0 0 0 70.564 126265.64 108899 24777.60 24179 765.7809 438.18649 1795.7152 950.7180 2742.777 788.9135 918.41797 699.7520 330.1357 912.4352 2287.560 1213.9369 1583.611 1057.1897 1482.738 452.2636
112409.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 VEMP A06 P2 15545_A6_A06_006.fcs 112409.fcs_398100 NA Hospitalized 63 N/A N/A N/A 47 7 1 1 0 0 0 0 1 0 0 1 1 0 0 16.969 90115.48 77125 18524.04 18037 1289.5042 314.98084 183.4656 956.0971 3007.340 707.0035 98.51392 2338.2048 586.7673 1409.6289 3021.614 1674.9799 1135.599 638.1335 2886.849 811.1515
112409.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 VEMP A06 P2 15545_A6_A06_006.fcs 112409.fcs_398100 NA Hospitalized 63 N/A N/A N/A 47 7 1 1 0 0 1 0 0 0 0 1 1 0 0 69.022 103021.04 91384 15381.60 14636 2025.3123 814.11292 705.6724 741.4208 2966.372 946.2702 491.29556 1535.7384 657.6039 1546.0616 2823.939 512.0006 1143.431 1950.6909 2615.035 291.3027
112409.fcs 20200528_COVID_ICS-B1 28-MAY-2020 15545 15545 VEMP A06 P2 15545_A6_A06_006.fcs 112409.fcs_398100 NA Hospitalized 63 N/A N/A N/A 47 7 1 1 0 0 0 0 1 0 0 1 0 0 0 32.179 123111.64 107516 26370.57 26124 1169.5574 257.73718 763.4775 596.2213 2761.780 527.0810 711.43835 2161.8057 676.6716 1169.7057 2848.102 1305.6493 1309.418 1014.0414 1405.771 903.6163
dim(notcd4_cytokinepos_data4tsne)
## [1] 16656    52

Is there maybe one cytokine that is dominating the entire sample?
If one cytokine has very high background expression (and a generous gate), it could be gated positive in a lot of events.
The high number of events expressing this cytokine could lead to it dominating the data, so that most sampled events are positive for this noisy cytokine. It would drown out real signal from other cytokines.

cytokine_dominance <- notcd4_cytokinepos_data4tsne %>% 
  group_by(Batch) %>% 
  summarise_at(notcd4_cytokine_gates, sum) %>%
  t() %>%
  as.data.frame() %>%
  set_names(c("B1", "B2", "B3")) %>%
  rownames_to_column("Cytokine_Gate") %>%
  dplyr::filter(Cytokine_Gate != "Batch")
knitr::kable(cytokine_dominance)
Cytokine_Gate B1 B2 B3
/Time/LD-3+/1419-3+/S/Lymph/NOT4+/107a 1403 2189 1709
/Time/LD-3+/1419-3+/S/Lymph/NOT4+/154 257 405 154
/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IFNG 837 849 631
/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL2 571 692 505
/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL17 2491 778 1860
/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL4513 793 1472 938
/Time/LD-3+/1419-3+/S/Lymph/NOT4+/TNF 483 733 547
cytokine_dominance %>%
  pivot_longer(cols = starts_with("B"), names_to = "Batch", values_to = "Events_in_Gate") %>% 
  mutate(Cytokine = sub(".*4\\+\\/(.*)", "\\1", Cytokine_Gate)) %>% 
  ggplot(aes(Cytokine, Events_in_Gate, fill = Cytokine)) +
  theme_bw(base_size=18) +
  geom_bar(stat="identity") +
  facet_grid(. ~ Batch) +
  labs(title = "Not-CD4 t-SNE Run Cytokine Dominance by Batch")

IL17 seems to be dominating Batch 1. We know that there is high IL17 DMSO background, so we’re likely including cells which were not stimulated by an experimental peptide.

Run t-SNE

# t-SNE can take a long time, so there is a rerun_tsne switch
if(rerun_tsne) {
  print("Running t-SNE")
  print(Sys.time())
  notcd4_cytokinepos_tsne_out <- notcd4_cytokinepos_data4tsne %>% 
    # Run CD3, co-receptor, cytokine, memory, and activation markers through t-SNE
    dplyr::select("CD3 ECD", "CD8b BB700", "CD4 APC-H7",
                  "TNFa FITC", "CD107a PE-Cy7", 
                  "CD154 PE-Cy5", "IL2 PE", "IL17a Ax700", 
                  "IL4/5/13 APC", "IFNg V450", 
                  "CCR7 BV711", "CD45RA BUV737",
                  "CD38 BV605", "HLADR BUV395") %>% 
    Rtsne::Rtsne(check_duplicates = FALSE)
  print(Sys.time())
  notcd4_cytokinepos_dat <- cbind(as.data.frame(notcd4_cytokinepos_tsne_out$Y) %>% 
    dplyr::rename(x = V1, y = V2),
    notcd4_cytokinepos_data4tsne)
  if(save_output) {
    print("Loading saved t-SNE run")
    saveRDS(notcd4_cytokinepos_dat, here::here(sprintf("out/tSNE/%s_ICS_NotCD4_CytokinePos_tSNE.rds", date)))
  }
} else {
  # Assuming t-SNE results are already saved
  notcd4_cytokinepos_dat <- readRDS(here::here(sprintf("out/tSNE/%s_ICS_NotCD4_CytokinePos_tSNE.rds", date)))
}
## [1] "Running t-SNE"
## [1] "2020-06-19 15:57:02 PDT"
## [1] "2020-06-19 15:58:12 PDT"
## [1] "Loading saved t-SNE run"

Plot t-SNE results

# Arial font setup. Downloaded afms from https://github.com/microsoft/microsoft-r-open/tree/ec3fd89e5fb5794bd8149905c134ad801bb61800
Arial <- Type1Font(family = "Arial",
                   metrics = c(here::here("data/Arial_afm/ArialMT.afm"),
                               here::here("data/Arial_afm/ArialMT-Bold.afm"), 
                               here::here("data/Arial_afm/ArialMT-Italic.afm"),
                               here::here("data/Arial_afm/ArialMT-BoldItalic.afm")))
pdfFonts(Arial = Arial)

boolColorScheme <- c("FALSE" = "#E2E2E2", "TRUE" = "#023FA5")

notcd4_cytokinepos_dat <- notcd4_cytokinepos_dat %>% 
  mutate(cytokine_degree = rowSums(dplyr::select(., all_of(notcd4_cytokine_gates))))
notcd4_cytokinepos_dat <- notcd4_cytokinepos_dat %>% 
  mutate(Cohort = factor(Cohort, levels = c("Non-hospitalized", "Hospitalized")),
         STIM = factor(STIM, levels = c("Spike 1", "Spike 2", "NCAP", "VEMP")))

stim_labs <- c("S1", "S2", "NCAP", "VEMP") 
names(stim_labs) <- c("Spike 1", "Spike 2", "NCAP", "VEMP")
cohort_labs <- c("Conv\nNon-Hosp", "Conv\nHosp")
names(cohort_labs) <- c("Non-hospitalized", "Hospitalized")

base_tsne_plot <- function(currentColumn, pointSize = 0.2, colorScheme = NA) {
  p <- ggplot(notcd4_cytokinepos_dat, aes(x=x, y=y,
                                       colour=if(currentColumn %in% c("Batch", "SAMPLE ID")) {
                                         factor(!!as.name(currentColumn))
                                         } else {
                                           as.logical(!!as.name(currentColumn))
                                           })) +
    geom_point(shape=20, alpha=0.8, size=pointSize) +
    facet_grid(Cohort ~ STIM, switch="y",
               labeller = labeller(Cohort = cohort_labs, STIM = stim_labs)) +
    theme_bw() +
    theme(plot.title=element_text(hjust = 0.5, size=22, face="bold"),
          axis.text=element_blank(),
          axis.line=element_blank(),
          axis.ticks=element_blank(),
          axis.title=element_blank(),
          strip.text=element_text(face="bold", size=22),
          panel.grid.major = element_blank(),
          legend.title=element_text(face="bold", size=14),
          strip.text.x = element_text(margin = margin(0.15,0,0.15,0, "cm")))
  if(!anyNA(colorScheme)) {
    p <- p + scale_color_manual(values = colorScheme)
  }
  if(currentColumn %in% c("Batch", "SAMPLE ID")) {
    p <- p + labs(color = currentColumn) +
      guides(colour = guide_legend(override.aes = list(size=10))) +
      theme(legend.title = element_text(size=15),
            legend.text = element_text(size=15),
            legend.position = "bottom") +
      scale_colour_manual(values=as.character(iwanthue(length(unique(notcd4_cytokinepos_dat[,currentColumn])))))
  } else {
    p <- p + theme(legend.position = "none")
  }
  p
}

Look for Batch and Patient sub-clustering

base_tsne_plot("Batch")

base_tsne_plot("SAMPLE ID")

Cytokine, Memory, and Activation expression localization

Now visualize the cytokine, memory, and activation expression localization

for(cg in notcd4_gates_for_tsne) {
  print(base_tsne_plot(cg, colorScheme = boolColorScheme) +
    labs(title = sprintf("NotCD4+Cytokine+ t-SNE\nColored by %s", sub(".*\\/([^(\\/)]+)", "\\1", cg))))
}

Color by degree

table(notcd4_cytokinepos_dat$cytokine_degree)
## 
##     1     2     3     4     5     6 
## 14270  1456   647   243    38     2
myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
sc <- scale_colour_gradientn(colours = myPalette(100), limits=c(1, 6))

currentColumn <- "cytokine_degree"
pointSize <- 0.2
ggplot(notcd4_cytokinepos_dat, aes(x=x, y=y, colour=!!as.name(currentColumn))) +
  geom_point(shape=20, alpha=0.8, size=pointSize) +
  facet_grid(Cohort ~ STIM, switch="y",
             labeller = labeller(Cohort = cohort_labs, STIM = stim_labs)) +
  labs(colour="Cytokine\nDegree",
       title="NotCD4+Cytokine+ t-SNE\nColored by Cytokine Polyfunctionality") +
  theme_bw() +
  theme(plot.title=element_text(hjust = 0.5, size=22, face="bold"),
        axis.text=element_blank(),
        axis.line=element_blank(),
        axis.ticks=element_blank(),
        axis.title=element_blank(),
        strip.text=element_text(face="bold", size=22),
        panel.grid.major = element_blank(),
        legend.title=element_text(face="bold", size=15),
        legend.text = element_text(size=13),
        strip.text.x = element_text(margin = margin(0.15,0,0.15,0, "cm")),
        legend.position = "bottom") +
  sc

Color t-SNE plots by CD4 and CD8 mfi

Since the Not-CD4 gate contains both CD4-high, CD4-low, CD8-high, and CD8-low events.

notcd4_cytokinepos_dat %>%
  ggplot(aes(x=`CD4 APC-H7`, color=factor(Batch), fill=factor(Batch))) +
  geom_histogram(alpha=0.2, position="identity") +
  geom_vline(xintercept = -1000) +
  geom_vline(xintercept = 2500)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

notcd4_cytokinepos_dat %>%
  ggplot(aes(x=`CD8b BB700`, color=factor(Batch), fill=factor(Batch))) +
  geom_histogram(alpha=0.2, position="identity") +
  geom_vline(xintercept = -3000) +
  geom_vline(xintercept = 7000)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

map2(.x = c("CD4 APC-H7", "CD8b BB700"),
     .y = list(c(-1000, 2500), c(-2500, 6500)),
     .f = function(currentColumn, currentLimits) {
        pointSize <- 0.4
        print(ggplot(notcd4_cytokinepos_dat, aes(x=x, y=y, colour=!!as.name(currentColumn))) +
          geom_point(shape=20, alpha=0.8, size=pointSize) +
          facet_grid(Cohort ~ STIM, switch="y",
                     labeller = labeller(Cohort = cohort_labs, STIM = stim_labs)) +
          labs(colour=currentColumn,
               title=sprintf("NotCD4+Cytokine+ t-SNE\nColored by %s mfi", currentColumn)) +
          theme_bw() +
          theme(plot.title=element_text(hjust = 0.5, size=22, face="bold"),
                axis.text=element_blank(),
                axis.line=element_blank(),
                axis.ticks=element_blank(),
                axis.title=element_blank(),
                strip.text=element_text(face="bold", size=22),
                panel.grid.major = element_blank(),
                legend.title=element_text(face="bold", size=15),
                legend.text = element_text(size=6),
                strip.text.x = element_text(margin = margin(0.15,0,0.15,0, "cm")),
                legend.position = "bottom") +
          # values lower or higher than the limits get converted to min(limits) and max(limits), respectively
          scale_colour_gradient(low = "red", high = "green", oob=squish, limits=currentLimits))
     })

## [[1]]

## 
## [[2]]

Not-CD4 Observations:

  • Differences across Stims:
    • CD107a more hghly present in VEMP (confirmed by COMPASS)
    • CD107a+IFNg+TNF+ less expressed in VEMP (confirmed by COMPASS)
    • IFNg and TNF highest in NCAP (confirmed by COMPASS)
    • IL2 less expressed in VEMP
    • IL17 and IL4/5/13 seen all stims (but unfortunately much of this is likely noise, as previously discussed)
  • By hospitalization status:
    • No big differences. The COMPASS follow-up dotplots also did not show any significant difference in subpopulation background-corrected magnitudes by status.
  • Memory
    • Unlike in CD4s where CCR7 was expressed by a majority events, CCR7 is probably expressed in ~50% of Not-CD4 events.
    • CD45RA looks like it’s expressed by ~40-50% of events, a little higher than with CD4s, but less localized.
  • Activation
    • Smattering of CD38
    • Only a tiny bit of HLA-DR (looks like it might be CD107a or IL2 positive)